#-----------------------------------------------------------------------------
#
# County-level descriptive results for Callaway and Li (2023)
#
# This code reproduces Figures S-2, S-4, and S-5 in the Supplementary
# Appendix.
# 
#
# Authors: Brantly Callaway and Tong Li
# Date:    2/8/2023
#
#-----------------------------------------------------------------------------


library(ggplot2)
library(ggrepel)
library(ggalt)

load("southeast_covid_data.RData")

dta <- southeast_covid_data
rm(southeast_covid_data)

start_date <- as.Date("2020-03-16")
end_date <- as.Date("2020-05-10")
policy_date <- as.Date("2020-04-01")

dta <- subset(dta, state=="Tennessee")

# counties to emphasize in plots
big.counties <- c("Williamson","Knox","Shelby","Davidson")
# counties to drop in plots
high.counties <- c("Trousdale","Bledsoe","Lake")



plot.dta <- subset(dta, !(county %in% high.counties))
# for labels in graphs
plot.dta$county.label <- as.character(plot.dta$county)
plot.dta$county.label[ plot.dta$date < (end_date - 1)]  <- ""
plot.dta$county.label[ ! (plot.dta$county %in% big.counties) ]  <- "" 

plot.dta$county.color <- "others"
plot.dta$county.color[ plot.dta$county %in% big.counties ]  <- plot.dta$county[plot.dta$county %in% big.counties]


dta1 <- subset(plot.dta, county %in% big.counties)
dta2 <- subset(plot.dta, !(county %in% big.counties))

make_county_plot <- function(var, ylabel, ylimit=NULL, save_plot=FALSE) {
  p <- ggplot(dta2, aes(x=date, y=.data[[var]], group=county, color=county.color)) +
    geom_line(size=0.8) + 
    #geom_vline(xintercept=policy_date,size=1.1) +
    ylab(ylabel) +
    xlab("Date") +
    xlim((start_date+1), (end_date-1)) + 
    theme_bw() +
    geom_line(data=dta1, size=1.4) +
    scale_color_manual(values=c("purple", "blue","lightgray","red","green")) +
    geom_label_repel(data=dta1, aes(label = county.label),
                     nudge_x = 0,
                     na.rm = TRUE, max.overlaps=50) +
    theme(legend.position="none") +
    scale_x_date(breaks=seq(start_date+5,end_date,by=7), date_labels="%b %d", limits=c(start_date+1, end_date-1))
  if (!is.null(ylimit)) {
    p <- p + ylim(ylimit)
  }
  print(p)
  if (save_plot) {
    ggsave(paste0("county-",var,".pdf"), width=6, height=3.5)
  }
}

save_plot <- FALSE

# produces Figure S-2, panel (a)
make_county_plot("cum_tests.pc", "Cumulative Tests per 1000 People", save_plot=save_plot)

# produces Figure S-2, panel (b)
make_county_plot("tests.pc.ma", "7 Day Moving Avg. Tests per 1000 People", save_plot=save_plot, ylimit=c(0,5))

# produces Figure S-2, panel (c)
make_county_plot("cum_cases.pc", "Cumulative Confirmed Cases per 1000 People", save_plot=save_plot)

# produces Figure S-2, panel (d)
make_county_plot("cases.pc.ma", "7 Day Moving Avg. Confirmed Cases per 1000 People", save_plot=save_plot, ylimit=c(0,0.3))

# produces Figure S-4, panel (a) 
make_county_plot("cum_deaths.pc", "Cumulative Deaths per 1000 People", save_plot=save_plot, ylimit=c(0,0.3))

# produces Figure S-4, panel (b)
make_county_plot("deaths.pc.ma", "7 Day Moving Avg. Deaths per 1000 People", save_plot=save_plot, ylimit=c(0,0.02))




#-----------------------------------------------------------------------------
# code to produce bounds in Figure S-5
#-----------------------------------------------------------------------------

this_date <- as.Date("2020-03-31")
# this_date <- as.Date("2020-04-25")

this_state <- "Tennessee"
# this_state <- "Alabama"

load("southeast_covid_data.RData")

this_data.dta <- subset(southeast_covid_data, date==this_date & state==this_state)
this_data.dta <- subset(this_data.dta, cum_cases.pc > 0)

# terms that bounds depend on 
p.R <- this_data.dta$cum_cases.pc/1000
p.T <- this_data.dta$cum_tests.pc/1000
p.RcondT1 <- this_data.dta$cum_cases.pc/this_data.dta$cum_tests.pc

# false negative rate of test
false.neg <- 0.25

gam <- p.R/(1-false.neg)
p.CcondT1 <- p.RcondT1/(1-false.neg)

Lower <- gam
# bound without any assumptions
Upper <- gam + (1-p.T)

# bound under assumption 1 in paper
Upper2 <- gam + p.CcondT1*(1-p.T)


# store results in data frame for plotting
this_data.dta$Upper <- Upper2
this_data.dta$Lower <- Lower



#-----------------------------------------------------------------------------
# make the plots
#-----------------------------------------------------------------------------

# make plot alphabetical
this_data.dta$county <- droplevels(as.factor(this_data.dta$county))
olevels <- levels(this_data.dta$county)
this_data.dta$county <- factor(this_data.dta$county, levels=olevels[-order(olevels)+length(olevels)+1])


tn_color <- "#FF8200"
al_color <- "#828A8f"
dumbbell_color <- ifelse(this_state=="Tennessee", tn_color, al_color)

ggplot(this_data.dta, aes(x=Lower, xend=Upper, y=county, group=county)) +
  geom_dumbbell(color=dumbbell_color) +
  xlab("Infection Rate") +
  ylab("County") +
  xlim(c(0,1)) +
  theme_bw()

